{
 "cells": [
  {
   "cell_type": "raw",
   "id": "98177d53",
   "metadata": {},
   "source": [
    "#泰坦尼克号幸存顾客预测，通过泰坦尼克号中乘客的数据来预测未来发生船难时哪些人可以幸存，哪些人不能幸存\n",
    "'PassengerId',ID\n",
    "'Survived',是否幸存，0,1\n",
    "'Pclass', 船舱等级\n",
    "'Name',姓名\n",
    "'Sex',性别\n",
    "'Age',年龄\n",
    "'SibSp',带亲属的数量(子女、兄妹)\n",
    "'Parch', 带亲属的数量(父母)\n",
    "'Ticket',船票编号\n",
    "'Fare', 船票价格\n",
    "'Embarked'登录港口"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f0457b74",
   "metadata": {},
   "source": [
    "1.读取数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "2f02663b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "f01d3bb8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>PassengerId</th>\n",
       "      <th>Survived</th>\n",
       "      <th>Pclass</th>\n",
       "      <th>Name</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Age</th>\n",
       "      <th>SibSp</th>\n",
       "      <th>Parch</th>\n",
       "      <th>Ticket</th>\n",
       "      <th>Fare</th>\n",
       "      <th>Cabin</th>\n",
       "      <th>Embarked</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Braund, Mr. Owen Harris</td>\n",
       "      <td>male</td>\n",
       "      <td>22.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>A/5 21171</td>\n",
       "      <td>7.2500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
       "      <td>female</td>\n",
       "      <td>38.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>PC 17599</td>\n",
       "      <td>71.2833</td>\n",
       "      <td>C85</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>Heikkinen, Miss. Laina</td>\n",
       "      <td>female</td>\n",
       "      <td>26.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>STON/O2. 3101282</td>\n",
       "      <td>7.9250</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
       "      <td>female</td>\n",
       "      <td>35.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>113803</td>\n",
       "      <td>53.1000</td>\n",
       "      <td>C123</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Allen, Mr. William Henry</td>\n",
       "      <td>male</td>\n",
       "      <td>35.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>373450</td>\n",
       "      <td>8.0500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   PassengerId  Survived  Pclass  \\\n",
       "0            1         0       3   \n",
       "1            2         1       1   \n",
       "2            3         1       3   \n",
       "3            4         1       1   \n",
       "4            5         0       3   \n",
       "\n",
       "                                                Name     Sex   Age  SibSp  \\\n",
       "0                            Braund, Mr. Owen Harris    male  22.0      1   \n",
       "1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   \n",
       "2                             Heikkinen, Miss. Laina  female  26.0      0   \n",
       "3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   \n",
       "4                           Allen, Mr. William Henry    male  35.0      0   \n",
       "\n",
       "   Parch            Ticket     Fare Cabin Embarked  \n",
       "0      0         A/5 21171   7.2500   NaN        S  \n",
       "1      0          PC 17599  71.2833   C85        C  \n",
       "2      0  STON/O2. 3101282   7.9250   NaN        S  \n",
       "3      0            113803  53.1000  C123        S  \n",
       "4      0            373450   8.0500   NaN        S  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv('C:/Users/lenovo/BigData/PythonBase/titanic_trains.csv') #路径：绝对路径\n",
    "df = pd.read_csv('./titanic_trains.csv') #相对路径都支持,可以按快捷键tab补全路径\n",
    "df.head() #查看数据的前5行"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f24ebbdf",
   "metadata": {},
   "source": [
    "2缺失值的处理-删除"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "19f9faca",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "PassengerId      0\n",
       "Survived         0\n",
       "Pclass           0\n",
       "Name             0\n",
       "Sex              0\n",
       "Age            177\n",
       "SibSp            0\n",
       "Parch            0\n",
       "Ticket           0\n",
       "Fare             0\n",
       "Cabin          687\n",
       "Embarked         2\n",
       "dtype: int64"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 按列删除(遵循80%法则)如果一列的非缺失部分低于80%则可以考虑删除该列，也就是缺失部分超过20%\n",
    "#axis按列还是行删除 how全部删除还是部分删除 thresh(非缺失值部分数据是多少，891是数据行，0.8是非缺失值占比)，inplace参数表示是否对原始原始\n",
    "#变量进行修改，Ture修改有返回值，false表示不修改并且没有返回值\n",
    "# df_drop = df.dropna(axis='columns',how='any',thresh=891*0.8) #3.10版本不支持how和thresh参数同时出现\n",
    "df_drop = df.dropna(axis='columns',thresh=891*0.8,inplace=False) #axis也可以是1，0，1代表列，0代表行\n",
    "#删掉缺失值占比超过20%的列之后，再看占比情况\n",
    "df_drop.isnull().sum()/df.shape[0]*100\n",
    "df.isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "a3b650c4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "PassengerId    0.0\n",
       "Survived       0.0\n",
       "Pclass         0.0\n",
       "Name           0.0\n",
       "Sex            0.0\n",
       "Age            0.0\n",
       "SibSp          0.0\n",
       "Parch          0.0\n",
       "Ticket         0.0\n",
       "Fare           0.0\n",
       "Embarked       0.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#按行删除\n",
    "df_drop_row = df_drop.dropna(axis=0,how='any')#按行删除时不能指定thresh参数\n",
    "df_drop_row.isnull().sum()/891*100  #这样删完以后，整个df_drop_row一个缺失值都没有了"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "979bbc24",
   "metadata": {},
   "source": [
    "3.建模"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "0f3e7222",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 712 entries, 0 to 890\n",
      "Data columns (total 11 columns):\n",
      " #   Column       Non-Null Count  Dtype  \n",
      "---  ------       --------------  -----  \n",
      " 0   PassengerId  712 non-null    int64  \n",
      " 1   Survived     712 non-null    int64  \n",
      " 2   Pclass       712 non-null    int64  \n",
      " 3   Name         712 non-null    object \n",
      " 4   Sex          712 non-null    object \n",
      " 5   Age          712 non-null    float64\n",
      " 6   SibSp        712 non-null    int64  \n",
      " 7   Parch        712 non-null    int64  \n",
      " 8   Ticket       712 non-null    object \n",
      " 9   Fare         712 non-null    float64\n",
      " 10  Embarked     712 non-null    object \n",
      "dtypes: float64(2), int64(5), object(4)\n",
      "memory usage: 66.8+ KB\n"
     ]
    }
   ],
   "source": [
    "df_drop_row.info()  #这里面的一些列是object类型，非数值类型，会影响我们建模计算"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "d73df363",
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "could not convert string to float: 'Braund, Mr. Owen Harris'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[9], line 5\u001b[0m\n\u001b[0;32m      3\u001b[0m model \u001b[38;5;241m=\u001b[39m DecisionTreeClassifier()\n\u001b[0;32m      4\u001b[0m \u001b[38;5;66;03m#会报错：ValueError: could not convert string to float: 'Braund, Mr. Owen Harris'    说明非数值类型会影响建模，具体在文档‘7特征编码文件中d’\u001b[39;00m\n\u001b[1;32m----> 5\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf_drop_row\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdrop\u001b[49m\u001b[43m(\u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mSurvived\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43mdf_drop_row\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mSurvived\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32mD:\\soft\\anaconda\\lib\\site-packages\\sklearn\\tree\\_classes.py:889\u001b[0m, in \u001b[0;36mDecisionTreeClassifier.fit\u001b[1;34m(self, X, y, sample_weight, check_input)\u001b[0m\n\u001b[0;32m    859\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfit\u001b[39m(\u001b[38;5;28mself\u001b[39m, X, y, sample_weight\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, check_input\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m):\n\u001b[0;32m    860\u001b[0m     \u001b[38;5;124;03m\"\"\"Build a decision tree classifier from the training set (X, y).\u001b[39;00m\n\u001b[0;32m    861\u001b[0m \n\u001b[0;32m    862\u001b[0m \u001b[38;5;124;03m    Parameters\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    886\u001b[0m \u001b[38;5;124;03m        Fitted estimator.\u001b[39;00m\n\u001b[0;32m    887\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[1;32m--> 889\u001b[0m     \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    890\u001b[0m \u001b[43m        \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    891\u001b[0m \u001b[43m        \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    892\u001b[0m \u001b[43m        \u001b[49m\u001b[43msample_weight\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msample_weight\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    893\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcheck_input\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcheck_input\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    894\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    895\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n",
      "File \u001b[1;32mD:\\soft\\anaconda\\lib\\site-packages\\sklearn\\tree\\_classes.py:186\u001b[0m, in \u001b[0;36mBaseDecisionTree.fit\u001b[1;34m(self, X, y, sample_weight, check_input)\u001b[0m\n\u001b[0;32m    184\u001b[0m check_X_params \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mdict\u001b[39m(dtype\u001b[38;5;241m=\u001b[39mDTYPE, accept_sparse\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcsc\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m    185\u001b[0m check_y_params \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mdict\u001b[39m(ensure_2d\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, dtype\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m--> 186\u001b[0m X, y \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_validate_data\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    187\u001b[0m \u001b[43m    \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalidate_separately\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mcheck_X_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcheck_y_params\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    188\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    189\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m issparse(X):\n\u001b[0;32m    190\u001b[0m     X\u001b[38;5;241m.\u001b[39msort_indices()\n",
      "File \u001b[1;32mD:\\soft\\anaconda\\lib\\site-packages\\sklearn\\base.py:560\u001b[0m, in \u001b[0;36mBaseEstimator._validate_data\u001b[1;34m(self, X, y, reset, validate_separately, **check_params)\u001b[0m\n\u001b[0;32m    558\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mestimator\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m check_X_params:\n\u001b[0;32m    559\u001b[0m     check_X_params \u001b[38;5;241m=\u001b[39m {\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mdefault_check_params, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mcheck_X_params}\n\u001b[1;32m--> 560\u001b[0m X \u001b[38;5;241m=\u001b[39m check_array(X, input_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mX\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mcheck_X_params)\n\u001b[0;32m    561\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mestimator\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m check_y_params:\n\u001b[0;32m    562\u001b[0m     check_y_params \u001b[38;5;241m=\u001b[39m {\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mdefault_check_params, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mcheck_y_params}\n",
      "File \u001b[1;32mD:\\soft\\anaconda\\lib\\site-packages\\sklearn\\utils\\validation.py:879\u001b[0m, in \u001b[0;36mcheck_array\u001b[1;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)\u001b[0m\n\u001b[0;32m    877\u001b[0m         array \u001b[38;5;241m=\u001b[39m xp\u001b[38;5;241m.\u001b[39mastype(array, dtype, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m    878\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 879\u001b[0m         array \u001b[38;5;241m=\u001b[39m \u001b[43m_asarray_with_order\u001b[49m\u001b[43m(\u001b[49m\u001b[43marray\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43morder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43morder\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mxp\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mxp\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    880\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ComplexWarning \u001b[38;5;28;01mas\u001b[39;00m complex_warning:\n\u001b[0;32m    881\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m    882\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mComplex data not supported\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(array)\n\u001b[0;32m    883\u001b[0m     ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcomplex_warning\u001b[39;00m\n",
      "File \u001b[1;32mD:\\soft\\anaconda\\lib\\site-packages\\sklearn\\utils\\_array_api.py:185\u001b[0m, in \u001b[0;36m_asarray_with_order\u001b[1;34m(array, dtype, order, copy, xp)\u001b[0m\n\u001b[0;32m    182\u001b[0m     xp, _ \u001b[38;5;241m=\u001b[39m get_namespace(array)\n\u001b[0;32m    183\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m xp\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m \u001b[38;5;129;01min\u001b[39;00m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnumpy\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnumpy.array_api\u001b[39m\u001b[38;5;124m\"\u001b[39m}:\n\u001b[0;32m    184\u001b[0m     \u001b[38;5;66;03m# Use NumPy API to support order\u001b[39;00m\n\u001b[1;32m--> 185\u001b[0m     array \u001b[38;5;241m=\u001b[39m \u001b[43mnumpy\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43masarray\u001b[49m\u001b[43m(\u001b[49m\u001b[43marray\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43morder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43morder\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    186\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m xp\u001b[38;5;241m.\u001b[39masarray(array, copy\u001b[38;5;241m=\u001b[39mcopy)\n\u001b[0;32m    187\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
      "File \u001b[1;32mD:\\soft\\anaconda\\lib\\site-packages\\pandas\\core\\generic.py:2070\u001b[0m, in \u001b[0;36mNDFrame.__array__\u001b[1;34m(self, dtype)\u001b[0m\n\u001b[0;32m   2069\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__array__\u001b[39m(\u001b[38;5;28mself\u001b[39m, dtype: npt\u001b[38;5;241m.\u001b[39mDTypeLike \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m np\u001b[38;5;241m.\u001b[39mndarray:\n\u001b[1;32m-> 2070\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43masarray\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_values\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m)\u001b[49m\n",
      "\u001b[1;31mValueError\u001b[0m: could not convert string to float: 'Braund, Mr. Owen Harris'"
     ]
    }
   ],
   "source": [
    "from sklearn.tree import DecisionTreeClassifier   #决策树类型\n",
    "\n",
    "model = DecisionTreeClassifier()\n",
    "#会报错：ValueError: could not convert string to float: 'Braund, Mr. Owen Harris'    说明非数值类型会影响建模，具体在文档‘7特征编码文件中d’\n",
    "model.fit(df_drop_row.drop(axis=1,columns='Survived'),df_drop_row['Survived'])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
